libinstall <- function(pkg, lib=TRUE) {
if(!require(pkg, character.only = T))install.packages(pkg)
if(lib)library(pkg, character.only = T)
}
# https://rstudio.github.io/distill/tables.html
libinstall("rmarkdown")
libinstall("kableExtra")
disp=function(tbl, nhead=0, ntail=0, style=paged_table){
if(is.null(style))style=function(t){
kbl(t)%>%
style()%>%
return()
}
if(isTRUE(getOption('knitr.in.progress'))){
if(nhead!=0)tbl=head(tbl, n=nhead)
if(ntail!=0)tbl=tail(tbl, n=ntail)
return(
tbl%>%
style()
)
}
return(tbl)
}
libinstall("tidyverse")
libinstall("glue")
libinstall("readr")
libinstall("plotly")
libinstall("readr")
libinstall("readxl")
libinstall("lubridate")
libinstall("curl")
libinstall("epidata")
if(!curl::has_internet())quit()
# Download cpsaat data
tmp <- tempfile()
curl_download("https://www.bls.gov/cps/cpsaat11.xlsx", destfile = tmp)
# Import cpsaat
cpsaat11 <- read_excel(
tmp,
col_names = c(
"Occupation",
"Total",
"Women",
"White",
"Black/African American",
"Asian",
"Hispanic/Latino"
),
na = "–",
col_types = c(
Occupation="text",
Total="numeric",
"Women"="numeric",
"White"="numeric",
"Black/African American"="numeric",
"Asian"="numeric",
"Hispanic/Latino"="numeric"
),
skip = 7
)%>%
drop_na(Occupation)
file.remove(tmp)
## [1] TRUE
rm(tmp)
Get the data at EPI
Labor_force_participation <- epidata::get_labor_force_participation_rate(by = "gr")
Medianaverage_hourly_wages <- epidata::get_median_and_mean_wages(by = "gr")
Minimum_wage <- epidata::get_minimum_wage()
cpsaat11%>%disp()
Looks fine.
Labor_force_participation%>%disp()
Participation=Labor_force_participation%>%
pivot_longer(-c(date), names_to = "Race", values_to = "Participation", values_drop_na = T)%>%
separate(Race, into = c("Race", "Gender"))
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 3012 rows [1, 2,
## 3, 4, 7, 10, 13, 14, 15, 16, 19, 22, 25, 26, 27, 28, 31, 34, 37, 38, ...].
Participation=Participation%>%
filter(grepl("women|men", Race, ignore.case = T))%>%
mutate(
Gender=Race,
Race=NA_character_
)%>%
union(
Participation%>%
filter(!grepl("women|men", Race, ignore.case = T))
)
Participation%>%
filter(!is.na(Race))
## # A tibble: 5,020 x 4
## date Race Gender Participation
## <date> <chr> <chr> <dbl>
## 1 1978-12-01 all <NA> 0.634
## 2 1978-12-01 black <NA> 0.617
## 3 1978-12-01 black women 0.535
## 4 1978-12-01 black men 0.718
## 5 1978-12-01 hispanic <NA> 0.633
## 6 1978-12-01 hispanic women 0.47
## 7 1978-12-01 hispanic men 0.812
## 8 1978-12-01 white <NA> 0.635
## 9 1978-12-01 white women 0.499
## 10 1978-12-01 white men 0.785
## # ... with 5,010 more rows
rm(Labor_force_participation)
Minimum_wage%>%disp()
#adjust for inflation to get to common 2019
Minimum_wage=Minimum_wage%>%
mutate(
Min2019=priceR::adjust_for_inflation(
federal_minimum_wage_real_x_2018_dollars,
2018,
"US",
2019
)
)
## Retrieving countries data
## Generating URL to request all 297 results
## Retrieving inflation data for US
## Generating URL to request all 61 results
Minimum_wage=Minimum_wage%>%
rename(MinCur=federal_minimum_wage_nominal_dollars)%>%
select(Min2019, MinCur, date)
Wages=Wages%>%
rename(
Date=date,
Median=median,
Average=average
)
Participation=Participation%>%
rename(Date=date)
Minimum_wage=Minimum_wage%>%
rename(Date=date)
g=Wages%>%
ggplot(aes(col=Race, x=Date))+
geom_line(aes(y=Average))+
geom_line(aes(y=Min2019, col=NULL), data=Minimum_wage, size=2)+
facet_wrap(~Gender)
ggplotly(g)
g=Wages%>%
ggplot(aes(col=Race, x=Date))+
geom_line(aes(y=Median))+
geom_line(aes(y=Min2019, col=NULL), data=Minimum_wage, size=2)+
facet_wrap(~Gender)
ggplotly(g)
g=Wages%>%
ggplot()+
geom_point(aes(x=Median, y=Average, col=Race, shape=Gender, frame=Date))+
ggtitle("Median vs Average Wage per Race and Gender over Time")
## Warning: Ignoring unknown aesthetics: frame
ggplotly(g)